library(EnvStats)
library(tidyverse)
library(readxl)
library(haven)
library(fixest)
library(splines)
library(ggsci)
library(ggthemes)
library(srvyr)
library(survey)
library(arsenal)

set.seed(24)

mycontrols_notests  <- tableby.control(test=FALSE, total=FALSE,
                                       numeric.stats=c("meansd", "medianq1q3", "range"),
                                       cat.stats=c("countpct"),
                                       stats.labels=list(countpct="N (%)", meansd="Mean (SD)", medianq1q3="Median (IQR)", range = "Range"),
                                       digits = 0L,
                                       digits.pct = 0L,
                                       digits.p = 3L,
                                       pfootnote = F)



# Data -----

kenya_pop <-
  read_xlsx("~/BurkeLab Dropbox/Carlos Gould/In Praise/Data/WPP2022_GEN_F01_DEMOGRAPHIC_INDICATORS_COMPACT_REV1.xlsx",sheet = 2) %>% 
  filter(`Region, subregion, country or area *`=="Kenya") %>% 
  dplyr::select(`Region, subregion, country or area *`, Year, 
                `Total Population, as of 1 January (thousands)`, 
                `Total Deaths (thousands)`,
                `Crude Death Rate (deaths per 1,000 population)`) %>% 
  rename(
    population = `Total Population, as of 1 January (thousands)`,
    total_deaths = `Total Deaths (thousands)`,
    death_rate = `Crude Death Rate (deaths per 1,000 population)`
  ) %>% 
  mutate(
    population = as.numeric(population)*1000,
    total_deaths = as.numeric(total_deaths)*1000,
    death_rate = as.numeric(death_rate)
  )


kenya_cooking <-
  read_xlsx("~/BurkeLab Dropbox/Carlos Gould/In Praise/Data/DHS_HH_Fuel_data_2023.xlsx") %>% 
  filter(`Country Name`=="Kenya" &
           `Survey Year` == 2019) 

kenya_cooking %>% filter(Indicator=="Households using clean fuel for cooking")
kenya_cooking %>% filter(Indicator=="Households using solid fuel for cooking")


# PRICE SENSITIVITY ------

# From correspondence with Matt Shupler Nov 3, 2023 -----

# This drove down the change in LPG consumption - the new estimate is that consumption decreased 
# by 0.55 kg/capita/month (0.82 to 0.27 kg/capita/month) between 2021 and 2022 (not 0.11 kg/capita/month, 
# as I stated in the pre-print).

# I also received additional pricing data from PayGo Energy that PAYG LPG prices increased 
# from 214 Kenyan Shilling (KSh)/kg ($1.53USD/kg) to 249 KSh/kg ($1.78USD/kg) in 
# 2023 (not the 180 to 214 KSh as stated in the pre-print), 
# but the relative change in price is nearly the same (34 vs 35 KSh) 
# so I don’t think that would influence the results.

# 35 KSh ~ 0.55

# 35/214
# price increased by: 0.1635514
# 0.1635514
# consumption declined by
# 0.55/0.82
# 0.6707317

# ergo price elasticity = 0.6707317/0.1635514
# 4.101045

# From Shupler et al. 2022:

# As PayGo Energy passed the added cost of the VAT onto their customers, the 
# PAYG LPG price increased by 34 KSh (180 to 214 KSh/kg) between 
# July-October 2020 and the same months in 2021.

# This was accompanied by a (statistically insignificant) decrease 
# (difference:-0.11 kg/capita/month 95%CI:[-0.30, 0.07]) in 
# average monthly per capita PAYG LPG consumption during the same
# period (Table 4)

# 34 KSh ~ -0.11 kg/capita/month

# in percentage terms:
# price increased by 
# 34/180
# 0.1888889
# consumption declined by
# 0.11/0.91
# 0.1208791

# ergo price elasticity : 0.1208791 / 0.1888889
# 0.6399481

# Elsewhere, Shupler reports that wealthier households were less likely 
# to alter their LPG consumption as a result of the VAT reintroduction., e.g.,
# In the 1.5-9 kg/capita/year group, ~55% reduced LPG
# consumption due to VAT reintroduction. 9.1-15, 50%, 15.1-21, 48%,
# 21-48 30%

# elsewhere: 
# (N=32) (N=316) (N=504) 
#   Exclusive primary secondary 
# kg/lpg/percapita/yr 
# median and iqr 

# 24.0
# [14.0, 36.0]
# 14.4
# [10.4, 24.0]
# 9.0
# [6.0, 14.4]


# similarly, we can recover: 
#   
#   19.69 (15.81, 24.51) = <0.85/kg
#   –0.05 = U$0.86–1.00 kg–1
#   –0.16 = US$1.01–1.10 kg–1
#   –0.36 = >US$1.10 kg–1

# KENYA VSL: 203,671 (2019) ------
# Kenya fNRB: 0.611 -------

# OK, generate hypothetical population distribution of kg lpg / capita / yr -----
# Of LPG users:
32/(32+316+504) # 3.8% are exclusive users
316/(32+316+504) # 37% are primary but not exclusive users
504/(32+316+504) # 59% are secondary. In other words, 59% of solid fuel users also use gas

# (N=32) (N=316) (N=504) 
#   Exclusive primary secondary 
# kg/lpg/percapita/yr 
# median and iqr 

# 24.0
# [14.0, 36.0]
# 14.4
# [10.4, 24.0]
# 9.0
# [6.0, 14.4]
# Set your parameters

# Exclusive users
exclusive_lpg_log_median <- log(24)       # Replace with your desired median
exclusive_lpg_log_q1 <- log(14)          # Replace with your desired 25th percentile
exclusive_lpg_log_q3 <- log(36*2.2)          # Replace with your desired 75th percentile
exclusive_lpg_num_observations <- 32   # Replace with the desired number of observations

exclusive_lpg_mu <- exclusive_lpg_log_median
exclusive_lpg_sigma <- (exclusive_lpg_log_q3 - exclusive_lpg_log_q1) / (2 * qnorm(0.75) - 2 * qnorm(0.25))

# Generate random data with the specified median, Q1, Q3
# set.seed(123)  # For reproducibility
exclusive_lpg_hypothetical_population <- rlnorm(
  exclusive_lpg_num_observations, 
  meanlog = exclusive_lpg_mu, 
  sdlog = exclusive_lpg_sigma)

# Check the median, Q1, and Q3 of the generated data
median(exclusive_lpg_hypothetical_population)
quantile(exclusive_lpg_hypothetical_population, probs = c(0.25, 0.75))

# Primary users
primary_lpg_log_median <- log(14.4)       # Replace with your desired median
primary_lpg_log_q1 <- log(10.4)         # Replace with your desired 25th percentile
primary_lpg_log_q3 <- log(24.4*2.2)          # Replace with your desired 75th percentile
primary_lpg_num_observations <- 316   # Replace with the desired number of observations

# Calculate the mean and standard deviation for the normal distribution
# based on the desired percentiles
primary_lpg_mu <- primary_lpg_log_median
primary_lpg_sigma <- (primary_lpg_log_q3 - primary_lpg_log_q1) / (2 * qnorm(0.75) - 2 * qnorm(0.25))

# Generate random data with the specified median, Q1, Q3
  # For reproducibility
primary_lpg_hypothetical_population <- rlnorm(primary_lpg_num_observations, 
                                              meanlog = primary_lpg_mu, 
                                              sdlog = primary_lpg_sigma)

# Check the median, Q1, and Q3 of the generated data
median(primary_lpg_hypothetical_population)
quantile(primary_lpg_hypothetical_population, probs = c(0.25, 0.75))
hist(primary_lpg_hypothetical_population)

# Secondary users
secondary_lpg_log_median <- log(9)       # Replace with your desired median
secondary_lpg_log_q1 <- log(6)       # Replace with your desired 25th percentile
secondary_lpg_log_q3 <- log(14.4*2)           # Replace with your desired 75th percentile
secondary_lpg_num_observations <- 504   # Replace with the desired number of observations

# Calculate the mean and standard deviation for the normal distribution
# based on the desired percentiles
secondary_lpg_mu <- secondary_lpg_log_median
secondary_lpg_sigma <- (secondary_lpg_log_q3 - secondary_lpg_log_q1) / (2 * qnorm(0.75) - 2 * qnorm(0.25))

# Generate random data with the specified median, Q1, Q3
  # For reproducibility
secondary_lpg_hypothetical_population <- rlnorm(
  secondary_lpg_num_observations, 
  meanlog = secondary_lpg_mu, 
  sdlog = secondary_lpg_sigma)

# Check the median, Q1, and Q3 of the generated data
median(secondary_lpg_hypothetical_population)
quantile(secondary_lpg_hypothetical_population, probs = c(0.25, 0.75))

# Generate population wide -------

# Roughly 13.4 million households in Kenya
# People per household = 4.3
# Population size = ~55 million

32/(32+316+504) # 3.8% are exclusive users
316/(32+316+504) # 37% are primary but not exclusive users
504/(32+316+504) # 59% are secondary. In other words, 59% of solid fuel users also use gas

# In total, 24% of households are primary LPG users. 
# Of this group, 24*.038 = exclusive. 

# So: 
# 0.9% of Kenyans are exclusive LPG users. 
  # Exclusive LPG users consume LPG like this:
ggplot(exclusive_lpg_hypothetical_population %>% 
         as_tibble(),
       aes(x=value)) + 
  geom_density(adjust=1, bw=5) 
# 23.1% are primary LPG, secondary biomass.
  # they consume LPG like this:
ggplot(primary_lpg_hypothetical_population %>% 
         as_tibble(),
       aes(x=value)) + 
  geom_density(adjust=1, bw=5) 

combined_lpg_population <- secondary_lpg_hypothetical_population %>% 
  as_tibble() %>% 
  bind_rows(
    primary_lpg_hypothetical_population %>% 
      as_tibble()
  ) %>% 
  bind_rows(
    exclusive_lpg_hypothetical_population %>% 
      as_tibble()
  )
# 44.8% are primary biomass, secondary LPG
  # they consume LPG like this:

ggplot() + 
  geom_density(data=combined_lpg_population, aes(x=value), adjust=1, bw=10)

  ggplot() + 
    geom_density(data=secondary_lpg_hypothetical_population %>% 
                   as_tibble(),
                 aes(x=value), adjust=1, bw=10, linetype="dotted") +
    geom_density(data=primary_lpg_hypothetical_population %>% 
                   as_tibble(),
                 aes(x=value), adjust=1, bw=10, linetype="dashed") +
    geom_density(data=exclusive_lpg_hypothetical_population %>% 
                   as_tibble(),
                 aes(x=value), adjust=1, bw=10, linetype="solid") + 
    xlab("LPG consumption kg/capita/year")
  
# 31.2% are exclusive biomass users
  # they consume 0 kg LPG/capita/yr



# PM2.5 EXPOSURES ------

# EASTERN SSA
#                   FEMALE          MALE          CHILD
# Gas/electric	    102 (34, 314)	  73 (24, 226)	89 (30, 273)
# Traditional wood	388 (115, 1122)	279 (83, 808)	337 (100, 976)

# TABLE 3 Comparison of modeled and measured 24-h PM2.5
# exposures (μg/m3
# )
# Modeled Measured
# LPG
# Mean 41 43
# Median 19 29
# 25th–75th percentile 10–41 27–46
# n 10 000 19
# Charcoal
# Mean 80 115
# Median 43 110
# 25th–75th percentile 21–92 50–121
# n 10 000 7
# Wood
# Mean 296 225
# Median 207 182
# 25th–75th percentile 113–386 104–292
# n 10 000 21

# Exclusive users PM2.5 exposure -------

# results: mean = 41, sd=36
# Define the target statistics
# exclusive_pm_mean_val <- 41  # Replace with your desired mean
# exclusive_pm_median_val <- 19  # Replace with your desired median
# exclusive_pm_percentile_25 <- 10  # Replace with your desired 25th percentile
# exclusive_pm_percentile_75 <- 41  # Replace with your desired 75th percentile
# 
# # Calculate the log-normal parameters
# exclusive_pm_sigma_sq <- (log(exclusive_pm_percentile_75) - log(exclusive_pm_percentile_25)) / (qnorm(0.75)-qnorm(0.25))^2
# exclusive_pm_mu <- log(exclusive_pm_mean_val) - 0.5 * exclusive_pm_sigma_sq
# 
# # Generate a large random sample from the log-normal distribution
# exclusive_pm_n_samples <- 100000  # Adjust the sample size as needed
# exclusive_pm_lognormal_sample <- rlnorm(
#   exclusive_pm_n_samples, 
#   meanlog = exclusive_pm_mu, 
#   sdlog = sqrt(exclusive_pm_sigma_sq))
# 
# # Truncate the sample to include only values within the desired percentile range
# exclusive_pm_truncated_sample <- 
#   exclusive_pm_lognormal_sample[exclusive_pm_lognormal_sample >= 0 &
#                                   exclusive_pm_lognormal_sample <= 250]
# 
# # Plot a histogram of the truncated sample
# hist(exclusive_pm_truncated_sample, breaks = 50, main = "Truncated Log-Normal Distribution", xlab = "Value")
# mean(exclusive_pm_truncated_sample)
# sd(exclusive_pm_truncated_sample)
# quantile(exclusive_pm_truncated_sample, probs=c(.25, .75))
# 
# 
# 
# # Primary users PM2.5 exposure -------
# 
# # results: mean = 85, sd=70
# # Define the target statistics
# primary_pm_mean_val <- 85  # Replace with your desired mean
# primary_pm_median_val <- 60  # Replace with your desired median
# primary_pm_percentile_25 <- 25  # Replace with your desired 25th percentile
# primary_pm_percentile_75 <- 90  # Replace with your desired 75th percentile
# 
# # Calculate the log-normal parameters
# primary_pm_sigma_sq <- (log(primary_pm_percentile_75) - log(primary_pm_percentile_25)) / (qnorm(0.75)-qnorm(0.25))^2
# primary_pm_mu <- log(primary_pm_mean_val) - 0.5 * primary_pm_sigma_sq
# 
# # Generate a large random sample from the log-normal distribution
# primary_pm_n_samples <- 100000  # Adjust the sample size as needed
# primary_pm_lognormal_sample <- 
#   rlnorm(primary_pm_n_samples, 
#          meanlog = primary_pm_mu, 
#          sdlog = sqrt(primary_pm_sigma_sq))
# 
# # Truncate the sample to include only values within the desired percentile range
# primary_pm_truncated_sample <- 
#   primary_pm_lognormal_sample[primary_pm_lognormal_sample >= 0 & 
#                                 primary_pm_lognormal_sample <= 500]
# 
# # Plot a histogram of the truncated sample
# hist(primary_pm_truncated_sample, breaks = 50, main = "Truncated Log-Normal Distribution", xlab = "Value")
# mean(primary_pm_truncated_sample)
# sd(primary_pm_truncated_sample)
# quantile(primary_pm_truncated_sample, probs=c(.25, .75))
# 
# 
# # Secondary users PM2.5 exposure -------
# 
# # results: mean = 85, sd=70
# # Define the target statistics
# secondary_pm_mean_val <- 85  # Replace with your desired mean
# secondary_pm_median_val <- 60  # Replace with your desired median
# secondary_pm_percentile_25 <- 25  # Replace with your desired 25th percentile
# secondary_pm_percentile_75 <- 90  # Replace with your desired 75th percentile
# 
# # Calculate the log-normal parameters
# secondary_pm_sigma_sq <- 
#   (log(secondary_pm_percentile_75) - log(secondary_pm_percentile_25)) / (qnorm(0.75)-qnorm(0.25))^2
# secondary_pm_mu <- log(secondary_pm_mean_val) - 0.5 * secondary_pm_sigma_sq
# 
# # Generate a large random sample from the log-normal distribution
# secondary_pm_n_samples <- 100000  # Adjust the sample size as needed
# secondary_pm_lognormal_sample <- 
#   rlnorm(secondary_pm_n_samples, 
#          meanlog = secondary_pm_mu, 
#          sdlog = sqrt(secondary_pm_sigma_sq))
# 
# # Truncate the sample to include only values within the desired percentile range
# secondary_pm_truncated_sample <- 
#   secondary_pm_lognormal_sample[secondary_pm_lognormal_sample >= 0 & 
#                                   secondary_pm_lognormal_sample <= 500]
# 
# # Plot a histogram of the truncated sample
# hist(secondary_pm_truncated_sample, breaks = 50, main = "Truncated Log-Normal Distribution", xlab = "Value")
# mean(secondary_pm_truncated_sample)
# sd(secondary_pm_truncated_sample)
# quantile(secondary_pm_truncated_sample, probs=c(.25, .75))



# PM2.5 exposure curves -------
min1 = rnorm(1, 35, 5)
max1 = rnorm(1, 250, 25)
decline1 = rnorm(1, 18, 2.5)
decline2 = rnorm(1, 13, 2.5)
exp1 = rnorm(1, 3, .05)
exp2 = rnorm(1, 5, .1)

x=1:55
PM25_1=min1 + (max1-min1) / (1 + (x/decline1)^exp1)
PM25_2=min1 + (max1-min1) / (1 + (x/decline2)^exp1)
PM25_3=max1 - ((max1 - min1) / max(x))*x

kg_lpg_pm25 <- tibble(
  kg_lpg = 1:55,
  PM25_1 = PM25_1,
  PM25_2 = PM25_2,
  PM25_3 = PM25_3
)

ggplot() +
  geom_line(data=kg_lpg_pm25, aes(x=kg_lpg, PM25_1)) +
  geom_line(data=kg_lpg_pm25, aes(x=kg_lpg, PM25_2), linetype="dotted") +
  geom_line(data=kg_lpg_pm25, aes(x=kg_lpg, PM25_3), linetype="dashed") +
  coord_cartesian(ylim=c(0, 300))



# Price sensitivities ------

# Among those that are exclusive LPG users
sens3 = 4.101045/10 # >30 kg/capita/yr
sens2 = 4.101045 # 15-30 kg/capita/yr
sens1 = 4.101045*1.1 # <15 kg/capita/yr

elasticity1 = rnorm(1, sens1, .025)
elasticity2 = rnormTrunc(1, sens2, .5, 0.1, elasticity1)
elasticity3 = rnormTrunc(1, sens3, .0025, 0.01, elasticity2)

base_price = 1.49138
price16 = base_price + base_price*.16 

percent_price_change_16 = (price16 - base_price) / price16

base_price = 1.49138
price8 = base_price + base_price*.08

percent_price_change_8 = (price8 - base_price) / price8

# percent_price_change = 0.1379306

combined_lpg_population1 <-
  combined_lpg_population %>%
  mutate(kg_lpg = value) %>%
  mutate(
    kg_lpg_1_8 =
      ifelse(kg_lpg < 15,
             kg_lpg -
               (kg_lpg * (percent_price_change_16 * elasticity1)),
             ifelse(
               kg_lpg >= 15 & kg_lpg < 30,
               kg_lpg -
                 (kg_lpg * (percent_price_change_16 * elasticity2)),
               ifelse(
                 kg_lpg >= 30 & kg_lpg < 45,
                 kg_lpg -
                   (kg_lpg * (percent_price_change_16 * elasticity3)),
                 ifelse(kg_lpg >= 45, kg_lpg, NA)
               )
             )
      ),
    kg_lpg_1_16 =
      ifelse(kg_lpg < 15,
             kg_lpg -
               (kg_lpg * (percent_price_change_8 * elasticity1)),
             ifelse(
               kg_lpg >= 15 & kg_lpg < 30,
               kg_lpg -
                 (kg_lpg * (percent_price_change_8 * elasticity2)),
               ifelse(
                 kg_lpg >= 30 & kg_lpg < 45,
                 kg_lpg -
                   (kg_lpg * (percent_price_change_8 * elasticity3)),
                 ifelse(kg_lpg >= 45, kg_lpg, NA)
               )
             )
      ),
    kg_lpg_2_16 = kg_lpg -
      (kg_lpg * (percent_price_change_16 * elasticity2)),
    kg_lpg_2_8 = kg_lpg -
      (kg_lpg * (percent_price_change_8 * elasticity2)),
    kg_lpg_3_16 = kg_lpg -
      (kg_lpg * (percent_price_change_16 * elasticity3)),
    kg_lpg_3_8 = kg_lpg -
      (kg_lpg * (percent_price_change_8 * elasticity3))
  ) %>%
  mutate(
    kg_lpg_1_16 = ifelse(kg_lpg_1_16 < 0, 0, kg_lpg_1_16),
    kg_lpg_1_8 = ifelse(kg_lpg_1_8 < 0, 0, kg_lpg_1_8),
    kg_lpg_2_16 = ifelse(kg_lpg_2_16 < 0, 0, kg_lpg_1_16),
    kg_lpg_2_8 = ifelse(kg_lpg_2_8 < 0, 0, kg_lpg_1_8),
    kg_lpg_3_16 = ifelse(kg_lpg_3_16 < 0, 0, kg_lpg_1_16),
    kg_lpg_3_8 = ifelse(kg_lpg_3_8 < 0, 0, kg_lpg_1_8),
  )


# New distribution of LPG consumption -------


lpg_densities_fig <- 
  ggplot() + 
  geom_density(data=combined_lpg_population1,
               aes(x=round(kg_lpg, digits=0)), adjust=1.05, bw=10) +
  geom_density(data=combined_lpg_population1,
               aes(x=round(kg_lpg_1_16, digits=0)), adjust=1.05, bw=10) + 
  geom_density(data=combined_lpg_population1,
               aes(x=round(kg_lpg_1_8, digits=0)), adjust=1, bw=10, linetype="dashed") + 
  geom_density(data=combined_lpg_population1,
               aes(x=round(kg_lpg_2_16, digits=0)), linetype="dotted", adjust=1, bw=10) + 
  geom_density(data=combined_lpg_population1,
               aes(x=round(kg_lpg_2_8, digits=0)), linetype="twodash", adjust=1, bw=10) + 
  geom_density(data=combined_lpg_population1,
               aes(x=round(kg_lpg_3_16, digits=0)), linetype="twodash", adjust=1, bw=10) + 
  geom_density(data=combined_lpg_population1,
               aes(x=round(kg_lpg_3_8, digits=0)), linetype="twodash", adjust=1, bw=10) + 
  xlab("Yearly LPG consumption per capita") + 
  ylab("density") + 
  theme_clean() +
  theme(plot.background = element_blank(),
        plot.title = element_blank())


specific_points <- seq(1, 55, 1)

observed <- ggplot_build(lpg_densities_fig)$data[[1]]
pred_2023_1_16 <- ggplot_build(lpg_densities_fig)$data[[2]]
pred_2023_1_8 <- ggplot_build(lpg_densities_fig)$data[[3]]
pred_2023_2_16 <- ggplot_build(lpg_densities_fig)$data[[4]]
pred_2023_2_8 <- ggplot_build(lpg_densities_fig)$data[[5]]
pred_2023_3_16 <- ggplot_build(lpg_densities_fig)$data[[6]]
pred_2023_3_8 <- ggplot_build(lpg_densities_fig)$data[[7]]

observed_density_values <- 
  approx(observed$x, observed$y, xout = specific_points)$y

pred_2023_1_16_density_values <- 
  approx(pred_2023_1_16$x, pred_2023_1_16$y, xout = specific_points)$y
pred_2023_1_8_density_values <- 
  approx(pred_2023_1_8$x, pred_2023_1_8$y, xout = specific_points)$y
pred_2023_2_16_density_values <- 
  approx(pred_2023_2_16$x, pred_2023_2_16$y, xout = specific_points)$y
pred_2023_2_8_density_values <- 
  approx(pred_2023_2_8$x, pred_2023_2_8$y, xout = specific_points)$y
pred_2023_3_16_density_values <- 
  approx(pred_2023_3_16$x, pred_2023_3_16$y, xout = specific_points)$y
pred_2023_3_8_density_values <- 
  approx(pred_2023_3_8$x, pred_2023_3_8$y, xout = specific_points)$y

density_2019_2023 <- 
  bind_cols(
    specific_points,
    observed_density_values,
    pred_2023_1_16_density_values,
    pred_2023_1_8_density_values,
    pred_2023_2_16_density_values,
    pred_2023_2_8_density_values,
    pred_2023_3_16_density_values,
    pred_2023_3_8_density_values
  ) %>% 
  as_tibble() %>% 
  rename(
    kg_lpg = `...1`,
    observed_density_values = `...2`,
    pred_2023_1_16_density_values = `...3`,
    pred_2023_1_8_density_values = `...4`,
    pred_2023_2_16_density_values = `...5`,
    pred_2023_2_8_density_values = `...6`,
    pred_2023_3_16_density_values = `...7`,
    pred_2023_3_8_density_values = `...8`
  )  %>% 
  mutate(
    sum_observed_density_values = sum(observed_density_values, na.rm=T),
    sum_pred_2023_1_16_density_values = sum(pred_2023_1_16_density_values, na.rm=T),
    sum_pred_2023_2_16_density_values = sum(pred_2023_2_16_density_values, na.rm=T),
    sum_pred_2023_3_16_density_values = sum(pred_2023_3_16_density_values, na.rm=T),
    sum_pred_2023_1_8_density_values = sum(pred_2023_1_8_density_values, na.rm=T),
    sum_pred_2023_2_8_density_values = sum(pred_2023_2_8_density_values, na.rm=T),
    sum_pred_2023_3_8_density_values = sum(pred_2023_3_8_density_values, na.rm=T),
  ) %>% 
  mutate(
    ratio_sum_observed_density_values = 1/sum_observed_density_values,
    ratio_sum_pred_2023_1_16_density_values = 1/sum_pred_2023_1_16_density_values,
    ratio_sum_pred_2023_2_16_density_values = 1/sum_pred_2023_2_16_density_values,
    ratio_sum_pred_2023_3_16_density_values = 1/sum_pred_2023_3_16_density_values,
    
    ratio_sum_pred_2023_1_8_density_values = 1/sum_pred_2023_1_8_density_values,
    ratio_sum_pred_2023_2_8_density_values = 1/sum_pred_2023_2_8_density_values,
    ratio_sum_pred_2023_3_8_density_values = 1/sum_pred_2023_3_8_density_values
  ) %>% 
  mutate(
    observed_density_values_1=observed_density_values*ratio_sum_observed_density_values,
    
    pred_2023_1_16_density_values_1=pred_2023_1_16_density_values*ratio_sum_pred_2023_1_16_density_values,
    pred_2023_2_16_density_values_2=pred_2023_2_16_density_values*ratio_sum_pred_2023_2_16_density_values,
    pred_2023_3_16_density_values_3=pred_2023_3_16_density_values*ratio_sum_pred_2023_3_16_density_values,
    
    pred_2023_1_8_density_values_1=pred_2023_1_8_density_values*ratio_sum_pred_2023_1_8_density_values,
    pred_2023_2_8_density_values_2=pred_2023_2_8_density_values*ratio_sum_pred_2023_2_8_density_values,
    pred_2023_3_8_density_values_3=pred_2023_3_8_density_values*ratio_sum_pred_2023_3_8_density_values
  )

# write_rds(density_2019_2023, "~/Downloads/kenya_kg_vat_density_2019_2023.rds")


  
# COMBINE KG LPG DENSITIES WITH PM25 SCENARIOS ------
kenya_kg_lpg_pm <- 
  density_2019_2023 %>% 
  left_join(kg_lpg_pm25)

# GENERATE FULL YEAR BY YEAR SCENARIOS --------

# 31.2% of Kenyans do not use any LPG

kenya_kg_pm_scenario <- 
  expand_grid(2023:2030,
              kenya_kg_lpg_pm) %>%
  rename(Year = `2023:2030`) %>%
  mutate(VSL = 230000,
         VSL_low = 230000*2/3,
         VSL_high = 230000*4/3) %>%
  left_join(kenya_pop) %>%
  mutate(lpg_fraction = (1-.312) * population) %>%
  mutate(
    observed_pop = lpg_fraction * observed_density_values_1,
    
    pred_1_16_pop = lpg_fraction * pred_2023_1_16_density_values_1,
    pred_2_16_pop = lpg_fraction * pred_2023_2_16_density_values_2,
    pred_3_16_pop = lpg_fraction * pred_2023_3_16_density_values_3,
    
    pred_1_8_pop = lpg_fraction * pred_2023_1_8_density_values_1,
    pred_2_8_pop = lpg_fraction * pred_2023_2_8_density_values_2,
    pred_3_8_pop = lpg_fraction * pred_2023_3_8_density_values_3,
    
  )



# CALCULATE DAMAGES ------

kenya_kg_pm_scenario_1 <-
  kenya_kg_pm_scenario %>%
  mutate(PM25_1 = PM25_1 - 2.4,
         PM25_2 = PM25_2 - 2.4,
         PM25_3 = PM25_3 - 2.4) %>%
  mutate(rho1 = log(1 + (PM25_1 / 1.6)) / (1 + exp((15.5 - PM25_1) / 36.8)),
         rho2 = log(1 + (PM25_2 / 1.6)) / (1 + exp((15.5 - PM25_2) / 36.8)),
         rho3 = log(1 + (PM25_3 / 1.6)) / (1 + exp((15.5 - PM25_3) / 36.8))) %>%
  mutate(hazard1 = (0.1430) * rho1,
         hazard2 = (0.1430) * rho2,
         hazard3 = (0.1430) * rho3) %>%
  mutate(deathrate1 = (1 - (1 / exp(hazard1))) * death_rate,
         deathrate2 = (1 - (1 / exp(hazard2))) * death_rate,
         deathrate3 = (1 - (1 / exp(hazard3))) * death_rate) %>%
  mutate(
    observed_deaths1 = (deathrate1 * observed_pop) / 1000,
    
    pred_1_16_deaths1 = (deathrate1 * pred_1_16_pop) / 1000,
    pred_2_16_deaths1 = (deathrate1 * pred_2_16_pop) / 1000,
    pred_3_16_deaths1 = (deathrate1 * pred_3_16_pop) / 1000,
    
    pred_1_8_deaths1 = (deathrate1 * pred_1_8_pop) / 1000,
    pred_2_8_deaths1 = (deathrate1 * pred_2_8_pop) / 1000,
    pred_3_8_deaths1 = (deathrate1 * pred_3_8_pop) / 1000,
    
    observed_deaths2 = (deathrate2 * observed_pop) / 1000,
    pred_1_16_deaths2 = (deathrate2 * pred_1_16_pop) / 1000,
    pred_2_16_deaths2 = (deathrate2 * pred_2_16_pop) / 1000,
    pred_3_16_deaths2 = (deathrate2 * pred_3_16_pop) / 1000,
    
    pred_1_8_deaths2 = (deathrate2 * pred_1_8_pop) / 1000,
    pred_2_8_deaths2 = (deathrate2 * pred_2_8_pop) / 1000,
    pred_3_8_deaths2 = (deathrate2 * pred_3_8_pop) / 1000,
    
    observed_deaths3 = (deathrate3 * observed_pop) / 1000,
    pred_1_16_deaths3 = (deathrate3 * pred_1_16_pop) / 1000,
    pred_2_16_deaths3 = (deathrate3 * pred_2_16_pop) / 1000,
    pred_3_16_deaths3 = (deathrate3 * pred_3_16_pop) / 1000,
    
    pred_1_8_deaths3 = (deathrate3 * pred_1_8_pop) / 1000,
    pred_2_8_deaths3 = (deathrate3 * pred_2_8_pop) / 1000,
    pred_3_8_deaths3 = (deathrate3 * pred_3_8_pop) / 1000,
    
  ) %>% 
  mutate(
    kg_lpg_consumed_observed = kg_lpg*observed_pop,
    kg_lpg_consumed_1_16 = kg_lpg*pred_1_16_pop,
    kg_lpg_consumed_2_16 = kg_lpg*pred_2_16_pop,
    kg_lpg_consumed_3_16 = kg_lpg*pred_3_16_pop,
    kg_lpg_consumed_1_8 = kg_lpg*pred_1_8_pop,
    kg_lpg_consumed_2_8 = kg_lpg*pred_2_8_pop,
    kg_lpg_consumed_3_8 = kg_lpg*pred_3_8_pop,
  )

kenya_kg_pm_scenario_1_yr <-
  kenya_kg_pm_scenario_1 %>%
  fgroup_by(Year) %>% 
  fsummarise(
    observed_pop = fmean(observed_pop, na.rm=T),
    
    pred_1_16_pop = fmean(pred_1_16_pop, na.rm=T),
    pred_2_16_pop = fmean(pred_2_16_pop, na.rm=T),
    pred_3_16_pop = fmean(pred_3_16_pop, na.rm=T),
    
    pred_1_8_pop = fmean(pred_1_8_pop, na.rm=T),
    pred_2_8_pop = fmean(pred_2_8_pop, na.rm=T),
    pred_3_8_pop = fmean(pred_3_8_pop, na.rm=T),
    
    PM25_1_16 = fmean(PM25_1, na.rm=T, w=pred_1_16_pop),
    PM25_2_16 = fmean(PM25_2, na.rm=T, w=pred_1_16_pop),
    PM25_3_16 = fmean(PM25_3, na.rm=T, w=pred_1_16_pop),
    
    PM25_1_8 = fmean(PM25_1, na.rm=T, w=pred_1_8_pop),
    PM25_2_8 = fmean(PM25_2, na.rm=T, w=pred_1_8_pop),
    PM25_3_8 = fmean(PM25_3, na.rm=T, w=pred_1_8_pop),
    
    kg_lpg_consumed_observed = fsum(kg_lpg_consumed_observed),
    kg_lpg_consumed_1_16 = fsum(kg_lpg_consumed_1_16),
    kg_lpg_consumed_2_16 = fsum(kg_lpg_consumed_2_16),
    kg_lpg_consumed_3_16 = fsum(kg_lpg_consumed_3_16),
    kg_lpg_consumed_1_8 = fsum(kg_lpg_consumed_1_8),
    kg_lpg_consumed_2_8 = fsum(kg_lpg_consumed_2_8),
    kg_lpg_consumed_3_8 = fsum(kg_lpg_consumed_3_8),
    
    VSL = min(VSL, na.rm=T),
    VSL_low = min(VSL_low, na.rm=T),
    VSL_high = min(VSL_high, na.rm=T),
    
    observed_deaths1 = fsum(observed_deaths1),
    pred_1_16_deaths1 = fsum(pred_1_16_deaths1),
    pred_2_16_deaths1 = fsum(pred_2_16_deaths1),
    pred_3_16_deaths1 = fsum(pred_3_16_deaths1),
    
    pred_1_8_deaths1 = fsum(pred_1_8_deaths1),
    pred_2_8_deaths1 = fsum(pred_2_8_deaths1),
    pred_3_8_deaths1 = fsum(pred_3_8_deaths1),
    
    observed_deaths2 = fsum(observed_deaths2),
    pred_1_16_deaths2 = fsum(pred_1_16_deaths2),
    pred_2_16_deaths2 = fsum(pred_2_16_deaths2),
    pred_3_16_deaths2 = fsum(pred_3_16_deaths2),
    
    pred_1_8_deaths2 = fsum(pred_1_8_deaths2),
    pred_2_8_deaths2 = fsum(pred_2_8_deaths2),
    pred_3_8_deaths2 = fsum(pred_3_8_deaths2),
    
    observed_deaths3 = fsum(observed_deaths3),
    pred_1_16_deaths3 = fsum(pred_1_16_deaths3),
    pred_2_16_deaths3 = fsum(pred_2_16_deaths3),
    pred_3_16_deaths3 = fsum(pred_3_16_deaths3),
    
    pred_1_8_deaths3 = fsum(pred_1_8_deaths3),
    pred_2_8_deaths3 = fsum(pred_2_8_deaths3),
    pred_3_8_deaths3 = fsum(pred_3_8_deaths3)
  ) %>%
  mutate(
    pred_1_16_deaths_difference1 = observed_deaths1 - pred_1_16_deaths1,
    pred_1_8_deaths_difference1 = observed_deaths1 - pred_1_8_deaths1,
    pred_2_16_deaths_difference1 = observed_deaths1 - pred_2_16_deaths1,
    pred_2_8_deaths_difference1 = observed_deaths1 - pred_2_8_deaths1,
    pred_3_16_deaths_difference1 = observed_deaths1 - pred_2_16_deaths1,
    pred_3_8_deaths_difference1 = observed_deaths1 - pred_2_8_deaths1,
    
    pred_1_16_deaths_difference2 = observed_deaths2 - pred_1_16_deaths2,
    pred_1_8_deaths_difference2 = observed_deaths2 - pred_1_8_deaths2,
    pred_2_16_deaths_difference2 = observed_deaths2 - pred_2_16_deaths2,
    pred_2_8_deaths_difference2 = observed_deaths2 - pred_2_8_deaths2,
    pred_3_16_deaths_difference2 = observed_deaths2 - pred_2_16_deaths2,
    pred_3_8_deaths_difference2 = observed_deaths2 - pred_2_8_deaths2,
    
    pred_1_16_deaths_difference3 = observed_deaths3 - pred_1_16_deaths3,
    pred_1_8_deaths_difference3 = observed_deaths3 - pred_1_8_deaths3,
    pred_2_16_deaths_difference3 = observed_deaths3 - pred_2_16_deaths3,
    pred_2_8_deaths_difference3 = observed_deaths3 - pred_2_8_deaths3,
    pred_3_16_deaths_difference3 = observed_deaths3 - pred_2_16_deaths3,
    pred_3_8_deaths_difference3 = observed_deaths3 - pred_2_8_deaths3
  ) %>%
  mutate(
    # Differences scaled by VSL
    pred_1_16_deaths_difference1_VSL = pred_1_16_deaths_difference1 * VSL,
    pred_1_8_deaths_difference1_VSL = pred_1_8_deaths_difference1 * VSL,
    pred_2_16_deaths_difference1_VSL = pred_2_16_deaths_difference1 * VSL,
    pred_2_8_deaths_difference1_VSL = pred_2_8_deaths_difference1 * VSL,
    pred_3_16_deaths_difference1_VSL = pred_3_16_deaths_difference1 * VSL,
    pred_3_8_deaths_difference1_VSL = pred_3_8_deaths_difference1 * VSL,
    
    pred_1_16_deaths_difference2_VSL = pred_1_16_deaths_difference2 * VSL,
    pred_1_8_deaths_difference2_VSL = pred_1_8_deaths_difference2 * VSL,
    pred_2_16_deaths_difference2_VSL = pred_2_16_deaths_difference2 * VSL,
    pred_2_8_deaths_difference2_VSL = pred_2_8_deaths_difference2 * VSL,
    pred_3_16_deaths_difference2_VSL = pred_3_16_deaths_difference2 * VSL,
    pred_3_8_deaths_difference2_VSL = pred_3_8_deaths_difference2 * VSL,
    
    pred_1_16_deaths_difference3_VSL = pred_1_16_deaths_difference3 * VSL,
    pred_1_8_deaths_difference3_VSL = pred_1_8_deaths_difference3 * VSL,
    pred_2_16_deaths_difference3_VSL = pred_2_16_deaths_difference3 * VSL,
    pred_2_8_deaths_difference3_VSL = pred_2_8_deaths_difference3 * VSL,
    pred_3_16_deaths_difference3_VSL = pred_3_16_deaths_difference3 * VSL,
    pred_3_8_deaths_difference3_VSL = pred_3_8_deaths_difference3 * VSL,
    
    # Differences scaled by VSL_low
    pred_1_16_deaths_difference1_VSL_low = pred_1_16_deaths_difference1 * VSL_low,
    pred_1_8_deaths_difference1_VSL_low = pred_1_8_deaths_difference1 * VSL_low,
    pred_2_16_deaths_difference1_VSL_low = pred_2_16_deaths_difference1 * VSL_low,
    pred_2_8_deaths_difference1_VSL_low = pred_2_8_deaths_difference1 * VSL_low,
    pred_3_16_deaths_difference1_VSL_low = pred_3_16_deaths_difference1 * VSL_low,
    pred_3_8_deaths_difference1_VSL_low = pred_3_8_deaths_difference1 * VSL_low,
    
    pred_1_16_deaths_difference2_VSL_low = pred_1_16_deaths_difference2 * VSL_low,
    pred_1_8_deaths_difference2_VSL_low = pred_1_8_deaths_difference2 * VSL_low,
    pred_2_16_deaths_difference2_VSL_low = pred_2_16_deaths_difference2 * VSL_low,
    pred_2_8_deaths_difference2_VSL_low = pred_2_8_deaths_difference2 * VSL_low,
    pred_3_16_deaths_difference2_VSL_low = pred_3_16_deaths_difference2 * VSL_low,
    pred_3_8_deaths_difference2_VSL_low = pred_3_8_deaths_difference2 * VSL_low,
    
    pred_1_16_deaths_difference3_VSL_low = pred_1_16_deaths_difference3 * VSL_low,
    pred_1_8_deaths_difference3_VSL_low = pred_1_8_deaths_difference3 * VSL_low,
    pred_2_16_deaths_difference3_VSL_low = pred_2_16_deaths_difference3 * VSL_low,
    pred_2_8_deaths_difference3_VSL_low = pred_2_8_deaths_difference3 * VSL_low,
    pred_3_16_deaths_difference3_VSL_low = pred_3_16_deaths_difference3 * VSL_low,
    pred_3_8_deaths_difference3_VSL_low = pred_3_8_deaths_difference3 * VSL_low,
    
    # Differences scaled by VSL_high
    pred_1_16_deaths_difference1_VSL_high = pred_1_16_deaths_difference1 * VSL_high,
    pred_1_8_deaths_difference1_VSL_high = pred_1_8_deaths_difference1 * VSL_high,
    pred_2_16_deaths_difference1_VSL_high = pred_2_16_deaths_difference1 * VSL_high,
    pred_2_8_deaths_difference1_VSL_high = pred_2_8_deaths_difference1 * VSL_high,
    pred_3_16_deaths_difference1_VSL_high = pred_3_16_deaths_difference1 * VSL_high,
    pred_3_8_deaths_difference1_VSL_high = pred_3_8_deaths_difference1 * VSL_high,
    
    pred_1_16_deaths_difference2_VSL_high = pred_1_16_deaths_difference2 * VSL_high,
    pred_1_8_deaths_difference2_VSL_high = pred_1_8_deaths_difference2 * VSL_high,
    pred_2_16_deaths_difference2_VSL_high = pred_2_16_deaths_difference2 * VSL_high,
    pred_2_8_deaths_difference2_VSL_high = pred_2_8_deaths_difference2 * VSL_high,
    pred_3_16_deaths_difference2_VSL_high = pred_3_16_deaths_difference2 * VSL_high,
    pred_3_8_deaths_difference2_VSL_high = pred_3_8_deaths_difference2 * VSL_high,
    
    pred_1_16_deaths_difference3_VSL_high = pred_1_16_deaths_difference3 * VSL_high,
    pred_1_8_deaths_difference3_VSL_high = pred_1_8_deaths_difference3 * VSL_high,
    pred_2_16_deaths_difference3_VSL_high = pred_2_16_deaths_difference3 * VSL_high,
    pred_2_8_deaths_difference3_VSL_high = pred_2_8_deaths_difference3 * VSL_high,
    pred_3_16_deaths_difference3_VSL_high = pred_3_16_deaths_difference3 * VSL_high,
    pred_3_8_deaths_difference3_VSL_high = pred_3_8_deaths_difference3 * VSL_high
  ) %>%
  mutate(
    pred_1_16_deaths_difference1_VSL_discounted = pred_1_16_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference1_VSL_discounted = pred_1_8_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference1_VSL_discounted = pred_2_16_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference1_VSL_discounted = pred_2_8_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference1_VSL_discounted = pred_3_16_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference1_VSL_discounted = pred_3_8_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
    
    pred_1_16_deaths_difference2_VSL_discounted = pred_1_16_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference2_VSL_discounted = pred_1_8_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference2_VSL_discounted = pred_2_16_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference2_VSL_discounted = pred_2_8_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference2_VSL_discounted = pred_3_16_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference2_VSL_discounted = pred_3_8_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
    
    pred_1_16_deaths_difference3_VSL_discounted = pred_1_16_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference3_VSL_discounted = pred_1_8_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference3_VSL_discounted = pred_2_16_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference3_VSL_discounted = pred_2_8_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference3_VSL_discounted = pred_3_16_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference3_VSL_discounted = pred_3_8_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
    
    pred_1_16_deaths_difference1_VSL_low_discounted = pred_1_16_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference1_VSL_low_discounted = pred_1_8_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference1_VSL_low_discounted = pred_2_16_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference1_VSL_low_discounted = pred_2_8_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference1_VSL_low_discounted = pred_3_16_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference1_VSL_low_discounted = pred_3_8_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
    
    pred_1_16_deaths_difference2_VSL_low_discounted = pred_1_16_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference2_VSL_low_discounted = pred_1_8_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference2_VSL_low_discounted = pred_2_16_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference2_VSL_low_discounted = pred_2_8_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference2_VSL_low_discounted = pred_3_16_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference2_VSL_low_discounted = pred_3_8_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
    
    pred_1_16_deaths_difference3_VSL_low_discounted = pred_1_16_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference3_VSL_low_discounted = pred_1_8_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference3_VSL_low_discounted = pred_2_16_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference3_VSL_low_discounted = pred_2_8_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference3_VSL_low_discounted = pred_3_16_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference3_VSL_low_discounted = pred_3_8_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
    
    pred_1_16_deaths_difference1_VSL_high_discounted = pred_1_16_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference1_VSL_high_discounted = pred_1_8_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference1_VSL_high_discounted = pred_2_16_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference1_VSL_high_discounted = pred_2_8_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference1_VSL_high_discounted = pred_3_16_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference1_VSL_high_discounted = pred_3_8_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
    
    pred_1_16_deaths_difference2_VSL_high_discounted = pred_1_16_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference2_VSL_high_discounted = pred_1_8_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference2_VSL_high_discounted = pred_2_16_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference2_VSL_high_discounted = pred_2_8_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference2_VSL_high_discounted = pred_3_16_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference2_VSL_high_discounted = pred_3_8_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
    
    pred_1_16_deaths_difference3_VSL_high_discounted = pred_1_16_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_1_8_deaths_difference3_VSL_high_discounted = pred_1_8_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_2_16_deaths_difference3_VSL_high_discounted = pred_2_16_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_2_8_deaths_difference3_VSL_high_discounted = pred_2_8_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_3_16_deaths_difference3_VSL_high_discounted = pred_3_16_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
    pred_3_8_deaths_difference3_VSL_high_discounted = pred_3_8_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023)
  
  )


# BOOTS -------

for (i in 1:1000) {
  
  # Step 1: Generate stochastic LPG consumption ------
  # Exclusive users
  exclusive_lpg_log_median <- log(24)       # Replace with your desired median
  exclusive_lpg_log_q1 <- log(14)          # Replace with your desired 25th percentile
  exclusive_lpg_log_q3 <- log(36*2.2)          # Replace with your desired 75th percentile
  exclusive_lpg_num_observations <- 32   # Replace with the desired number of observations
  
  exclusive_lpg_mu <- exclusive_lpg_log_median
  exclusive_lpg_sigma <- (exclusive_lpg_log_q3 - exclusive_lpg_log_q1) / (2 * qnorm(0.75) - 2 * qnorm(0.25))
  
  # Generate random data with the specified median, Q1, Q3
  # set.seed(123)  # For reproducibility
  exclusive_lpg_hypothetical_population <- rlnorm(
    exclusive_lpg_num_observations, 
    meanlog = exclusive_lpg_mu, 
    sdlog = exclusive_lpg_sigma)
  
  # Primary users
  primary_lpg_log_median <- log(14.4)       # Replace with your desired median
  primary_lpg_log_q1 <- log(10.4)         # Replace with your desired 25th percentile
  primary_lpg_log_q3 <- log(24.4*2.2)          # Replace with your desired 75th percentile
  primary_lpg_num_observations <- 316   # Replace with the desired number of observations
  primary_lpg_mu <- primary_lpg_log_median
  primary_lpg_sigma <- (primary_lpg_log_q3 - primary_lpg_log_q1) / (2 * qnorm(0.75) - 2 * qnorm(0.25))
  
  primary_lpg_hypothetical_population <- rlnorm(primary_lpg_num_observations, 
                                                meanlog = primary_lpg_mu, 
                                                sdlog = primary_lpg_sigma)
  
  # Secondary users
  secondary_lpg_log_median <- log(9)       # Replace with your desired median
  secondary_lpg_log_q1 <- log(6)       # Replace with your desired 25th percentile
  secondary_lpg_log_q3 <- log(14.4*2)           # Replace with your desired 75th percentile
  secondary_lpg_num_observations <- 504   # Replace with the desired number of observations
  
  # Calculate the mean and standard deviation for the normal distribution
  # based on the desired percentiles
  secondary_lpg_mu <- secondary_lpg_log_median
  secondary_lpg_sigma <- (secondary_lpg_log_q3 - secondary_lpg_log_q1) / (2 * qnorm(0.75) - 2 * qnorm(0.25))
  
  # Generate random data with the specified median, Q1, Q3
  # For reproducibility
  secondary_lpg_hypothetical_population <- rlnorm(
    secondary_lpg_num_observations, 
    meanlog = secondary_lpg_mu, 
    sdlog = secondary_lpg_sigma)
  
  
  combined_lpg_population <- secondary_lpg_hypothetical_population %>% 
    as_tibble() %>% 
    bind_rows(
      primary_lpg_hypothetical_population %>% 
        as_tibble()
    ) %>% 
    bind_rows(
      exclusive_lpg_hypothetical_population %>% 
        as_tibble()
    )
  
  
  # Step 2: Generate stochastic PM2.5 exposure curves -------
  # PM2.5 exposure curves -------
  min1 = rnorm(1, 35, 5)
  max1 = rnorm(1, 250, 25)
  decline1 = rnorm(1, 18, 2.5)
  decline2 = rnorm(1, 13, 2.5)
  exp1 = rnorm(1, 3, .05)
  exp2 = rnorm(1, 5, .1)
  
  x=1:55
  PM25_1=min1 + (max1-min1) / (1 + (x/decline1)^exp1)
  PM25_2=min1 + (max1-min1) / (1 + (x/decline2)^exp1)
  PM25_3=max1 - ((max1 - min1) / max(x))*x
  
  kg_lpg_pm25 <- tibble(
    kg_lpg = 1:55,
    PM25_1 = PM25_1,
    PM25_2 = PM25_2,
    PM25_3 = PM25_3
  )
  
  ggplot() +
    geom_line(data=kg_lpg_pm25, aes(x=kg_lpg, PM25_1)) +
    geom_line(data=kg_lpg_pm25, aes(x=kg_lpg, PM25_2), linetype="dotted") +
    geom_line(data=kg_lpg_pm25, aes(x=kg_lpg, PM25_3), linetype="dashed") +
    coord_cartesian(ylim=c(0, 300))
  
  
  
  # Price sensitivities ------
  
  # Among those that are exclusive LPG users
  sens3 = 4.101045/10 # >30 kg/capita/yr
  sens2 = 4.101045 # 15-30 kg/capita/yr
  sens1 = 4.101045*1.1 # <15 kg/capita/yr
  
  elasticity1 = rnorm(1, sens1, .025)
  elasticity2 = rnormTrunc(1, sens2, .5, 0.1, elasticity1)
  elasticity3 = rnormTrunc(1, sens3, .0025, 0.01, elasticity2)
  
  base_price = 1.49138
  price16 = base_price + base_price*.16 
  
  percent_price_change_16 = (price16 - base_price) / price16
  
  base_price = 1.49138
  price8 = base_price + base_price*.08
  
  percent_price_change_8 = (price8 - base_price) / price8
  
  # percent_price_change = 0.1379306
  
  combined_lpg_population1 <-
    combined_lpg_population %>%
    mutate(kg_lpg = value) %>%
    mutate(
      kg_lpg_1_16 =
        ifelse(kg_lpg < 15,
               kg_lpg -
                 (kg_lpg * (percent_price_change_16 * elasticity1)),
               ifelse(
                 kg_lpg >= 15 & kg_lpg < 30,
                 kg_lpg -
                   (kg_lpg * (percent_price_change_16 * elasticity2)),
                 ifelse(
                   kg_lpg >= 30 & kg_lpg < 45,
                   kg_lpg -
                     (kg_lpg * (percent_price_change_16 * elasticity3)),
                   ifelse(kg_lpg >= 45, kg_lpg, NA)
                 )
               )
        ),
      kg_lpg_1_8 =
        ifelse(kg_lpg < 15,
               kg_lpg -
                 (kg_lpg * (percent_price_change_8 * elasticity1)),
               ifelse(
                 kg_lpg >= 15 & kg_lpg < 30,
                 kg_lpg -
                   (kg_lpg * (percent_price_change_8 * elasticity2)),
                 ifelse(
                   kg_lpg >= 30 & kg_lpg < 45,
                   kg_lpg -
                     (kg_lpg * (percent_price_change_8 * elasticity3)),
                   ifelse(kg_lpg >= 45, kg_lpg, NA)
                 )
               )
        ),
      kg_lpg_2_16 = kg_lpg -
        (kg_lpg * (percent_price_change_16 * elasticity2)),
      kg_lpg_2_8 = kg_lpg -
        (kg_lpg * (percent_price_change_8 * elasticity2)),
      kg_lpg_3_16 = kg_lpg -
        (kg_lpg * (percent_price_change_16 * elasticity3)),
      kg_lpg_3_8 = kg_lpg -
        (kg_lpg * (percent_price_change_8 * elasticity3))
    ) %>%
    mutate(
      kg_lpg_1_16 = ifelse(kg_lpg_1_16 < 0, 0, kg_lpg_1_16),
      kg_lpg_1_8 = ifelse(kg_lpg_1_8 < 0, 0, kg_lpg_1_8),
      kg_lpg_2_16 = ifelse(kg_lpg_2_16 < 0, 0, kg_lpg_1_16),
      kg_lpg_2_8 = ifelse(kg_lpg_2_8 < 0, 0, kg_lpg_1_8),
      kg_lpg_3_16 = ifelse(kg_lpg_3_16 < 0, 0, kg_lpg_1_16),
      kg_lpg_3_8 = ifelse(kg_lpg_3_8 < 0, 0, kg_lpg_1_8),
    )
  
  
  # New distribution of LPG consumption -------
  
  
  lpg_densities_fig <- 
    ggplot() + 
    geom_density(data=combined_lpg_population1,
                 aes(x=round(kg_lpg, digits=0)), adjust=1.05, bw=10) +
    geom_density(data=combined_lpg_population1,
                 aes(x=round(kg_lpg_1_16, digits=0)), adjust=1.05, bw=10) + 
    geom_density(data=combined_lpg_population1,
                 aes(x=round(kg_lpg_1_8, digits=0)), adjust=1, bw=10, linetype="dashed") + 
    geom_density(data=combined_lpg_population1,
                 aes(x=round(kg_lpg_2_16, digits=0)), linetype="dotted", adjust=1, bw=10) + 
    geom_density(data=combined_lpg_population1,
                 aes(x=round(kg_lpg_2_8, digits=0)), linetype="twodash", adjust=1, bw=10) + 
    geom_density(data=combined_lpg_population1,
                 aes(x=round(kg_lpg_3_16, digits=0)), linetype="twodash", adjust=1, bw=10) + 
    geom_density(data=combined_lpg_population1,
                 aes(x=round(kg_lpg_3_8, digits=0)), linetype="twodash", adjust=1, bw=10) + 
    xlab("Yearly LPG consumption per capita") + 
    ylab("density") + 
    theme_clean() +
    theme(plot.background = element_blank(),
          plot.title = element_blank())
  
  
  specific_points <- seq(1, 55, 1)
  
  observed <- ggplot_build(lpg_densities_fig)$data[[1]]
  pred_2023_1_16 <- ggplot_build(lpg_densities_fig)$data[[2]]
  pred_2023_1_8 <- ggplot_build(lpg_densities_fig)$data[[3]]
  pred_2023_2_16 <- ggplot_build(lpg_densities_fig)$data[[4]]
  pred_2023_2_8 <- ggplot_build(lpg_densities_fig)$data[[5]]
  pred_2023_3_16 <- ggplot_build(lpg_densities_fig)$data[[6]]
  pred_2023_3_8 <- ggplot_build(lpg_densities_fig)$data[[7]]
  
  observed_density_values <- 
    approx(observed$x, observed$y, xout = specific_points)$y
  
  pred_2023_1_16_density_values <- 
    approx(pred_2023_1_16$x, pred_2023_1_16$y, xout = specific_points)$y
  pred_2023_1_8_density_values <- 
    approx(pred_2023_1_8$x, pred_2023_1_8$y, xout = specific_points)$y
  pred_2023_2_16_density_values <- 
    approx(pred_2023_2_16$x, pred_2023_2_16$y, xout = specific_points)$y
  pred_2023_2_8_density_values <- 
    approx(pred_2023_2_8$x, pred_2023_2_8$y, xout = specific_points)$y
  pred_2023_3_16_density_values <- 
    approx(pred_2023_3_16$x, pred_2023_3_16$y, xout = specific_points)$y
  pred_2023_3_8_density_values <- 
    approx(pred_2023_3_8$x, pred_2023_3_8$y, xout = specific_points)$y
  
  density_2019_2023 <- 
    bind_cols(
      specific_points,
      observed_density_values,
      pred_2023_1_16_density_values,
      pred_2023_1_8_density_values,
      pred_2023_2_16_density_values,
      pred_2023_2_8_density_values,
      pred_2023_3_16_density_values,
      pred_2023_3_8_density_values
    ) %>% 
    as_tibble() %>% 
    rename(
      kg_lpg = `...1`,
      observed_density_values = `...2`,
      pred_2023_1_16_density_values = `...3`,
      pred_2023_1_8_density_values = `...4`,
      pred_2023_2_16_density_values = `...5`,
      pred_2023_2_8_density_values = `...6`,
      pred_2023_3_16_density_values = `...7`,
      pred_2023_3_8_density_values = `...8`
    )  %>% 
    mutate(
      sum_observed_density_values = sum(observed_density_values, na.rm=T),
      sum_pred_2023_1_16_density_values = sum(pred_2023_1_16_density_values, na.rm=T),
      sum_pred_2023_2_16_density_values = sum(pred_2023_2_16_density_values, na.rm=T),
      sum_pred_2023_3_16_density_values = sum(pred_2023_3_16_density_values, na.rm=T),
      sum_pred_2023_1_8_density_values = sum(pred_2023_1_8_density_values, na.rm=T),
      sum_pred_2023_2_8_density_values = sum(pred_2023_2_8_density_values, na.rm=T),
      sum_pred_2023_3_8_density_values = sum(pred_2023_3_8_density_values, na.rm=T),
    ) %>% 
    mutate(
      ratio_sum_observed_density_values = 1/sum_observed_density_values,
      ratio_sum_pred_2023_1_16_density_values = 1/sum_pred_2023_1_16_density_values,
      ratio_sum_pred_2023_2_16_density_values = 1/sum_pred_2023_2_16_density_values,
      ratio_sum_pred_2023_3_16_density_values = 1/sum_pred_2023_3_16_density_values,
      
      ratio_sum_pred_2023_1_8_density_values = 1/sum_pred_2023_1_8_density_values,
      ratio_sum_pred_2023_2_8_density_values = 1/sum_pred_2023_2_8_density_values,
      ratio_sum_pred_2023_3_8_density_values = 1/sum_pred_2023_3_8_density_values
    ) %>% 
    mutate(
      observed_density_values_1=observed_density_values*ratio_sum_observed_density_values,
      
      pred_2023_1_16_density_values_1=pred_2023_1_16_density_values*ratio_sum_pred_2023_1_16_density_values,
      pred_2023_2_16_density_values_2=pred_2023_2_16_density_values*ratio_sum_pred_2023_2_16_density_values,
      pred_2023_3_16_density_values_3=pred_2023_3_16_density_values*ratio_sum_pred_2023_3_16_density_values,
      
      pred_2023_1_8_density_values_1=pred_2023_1_8_density_values*ratio_sum_pred_2023_1_8_density_values,
      pred_2023_2_8_density_values_2=pred_2023_2_8_density_values*ratio_sum_pred_2023_2_8_density_values,
      pred_2023_3_8_density_values_3=pred_2023_3_8_density_values*ratio_sum_pred_2023_3_8_density_values
    )
  
  
  # write_rds(density_2019_2023, "~/Downloads/kenya_kg_vat_density_2019_2023.rds")
  
  # COMBINE KG LPG DENSITIES WITH PM25 SCENARIOS ------
  kenya_kg_lpg_pm <- 
    density_2019_2023 %>% 
    left_join(kg_lpg_pm25)
  
  # GENERATE FULL YEAR BY YEAR SCENARIOS --------
  
  # 31.2% of Kenyans do not use any LPG
  
  kenya_kg_pm_scenario <- 
    expand_grid(2023:2030,
                kenya_kg_lpg_pm) %>%
    rename(Year = `2023:2030`) %>%
    mutate(VSL = 230000,
           VSL_low = 230000*2/3,
           VSL_high = 230000*4/3) %>%
    left_join(kenya_pop) %>%
    mutate(lpg_fraction = (1-.312) * population) %>%
    mutate(
      observed_pop = lpg_fraction * observed_density_values_1,
      
      pred_1_16_pop = lpg_fraction * pred_2023_1_16_density_values_1,
      pred_2_16_pop = lpg_fraction * pred_2023_2_16_density_values_2,
      pred_3_16_pop = lpg_fraction * pred_2023_3_16_density_values_3,
      
      pred_1_8_pop = lpg_fraction * pred_2023_1_8_density_values_1,
      pred_2_8_pop = lpg_fraction * pred_2023_2_8_density_values_2,
      pred_3_8_pop = lpg_fraction * pred_2023_3_8_density_values_3,
      
    )
  
  
  
  # CALCULATE DAMAGES ------
  
  kenya_kg_pm_scenario_1 <-
    kenya_kg_pm_scenario %>%
    mutate(PM25_1 = PM25_1 - 2.4,
           PM25_2 = PM25_2 - 2.4,
           PM25_3 = PM25_3 - 2.4) %>%
    mutate(rho1 = log(1 + (PM25_1 / 1.6)) / (1 + exp((15.5 - PM25_1) / 36.8)),
           rho2 = log(1 + (PM25_2 / 1.6)) / (1 + exp((15.5 - PM25_2) / 36.8)),
           rho3 = log(1 + (PM25_3 / 1.6)) / (1 + exp((15.5 - PM25_3) / 36.8))) %>%
    mutate(hazard1 = (0.1430) * rho1,
           hazard2 = (0.1430) * rho2,
           hazard3 = (0.1430) * rho3) %>%
    mutate(deathrate1 = (1 - (1 / exp(hazard1))) * death_rate,
           deathrate2 = (1 - (1 / exp(hazard2))) * death_rate,
           deathrate3 = (1 - (1 / exp(hazard3))) * death_rate) %>%
    mutate(
      observed_deaths1 = (deathrate1 * observed_pop) / 1000,
      
      pred_1_16_deaths1 = (deathrate1 * pred_1_16_pop) / 1000,
      pred_2_16_deaths1 = (deathrate1 * pred_2_16_pop) / 1000,
      pred_3_16_deaths1 = (deathrate1 * pred_3_16_pop) / 1000,
      
      pred_1_8_deaths1 = (deathrate1 * pred_1_8_pop) / 1000,
      pred_2_8_deaths1 = (deathrate1 * pred_2_8_pop) / 1000,
      pred_3_8_deaths1 = (deathrate1 * pred_3_8_pop) / 1000,
      
      observed_deaths2 = (deathrate2 * observed_pop) / 1000,
      pred_1_16_deaths2 = (deathrate2 * pred_1_16_pop) / 1000,
      pred_2_16_deaths2 = (deathrate2 * pred_2_16_pop) / 1000,
      pred_3_16_deaths2 = (deathrate2 * pred_3_16_pop) / 1000,
      
      pred_1_8_deaths2 = (deathrate2 * pred_1_8_pop) / 1000,
      pred_2_8_deaths2 = (deathrate2 * pred_2_8_pop) / 1000,
      pred_3_8_deaths2 = (deathrate2 * pred_3_8_pop) / 1000,
      
      observed_deaths3 = (deathrate3 * observed_pop) / 1000,
      pred_1_16_deaths3 = (deathrate3 * pred_1_16_pop) / 1000,
      pred_2_16_deaths3 = (deathrate3 * pred_2_16_pop) / 1000,
      pred_3_16_deaths3 = (deathrate3 * pred_3_16_pop) / 1000,
      
      pred_1_8_deaths3 = (deathrate3 * pred_1_8_pop) / 1000,
      pred_2_8_deaths3 = (deathrate3 * pred_2_8_pop) / 1000,
      pred_3_8_deaths3 = (deathrate3 * pred_3_8_pop) / 1000,
      
    ) %>% 
    mutate(
      kg_lpg_consumed_observed = kg_lpg*observed_pop,
      kg_lpg_consumed_1_16 = kg_lpg*pred_1_16_pop,
      kg_lpg_consumed_2_16 = kg_lpg*pred_2_16_pop,
      kg_lpg_consumed_3_16 = kg_lpg*pred_3_16_pop,
      kg_lpg_consumed_1_8 = kg_lpg*pred_1_8_pop,
      kg_lpg_consumed_2_8 = kg_lpg*pred_2_8_pop,
      kg_lpg_consumed_3_8 = kg_lpg*pred_3_8_pop,
    )
  
  kenya_kg_pm_scenario_1_yr <-
    kenya_kg_pm_scenario_1 %>%
    fgroup_by(Year) %>% 
    fsummarise(
      observed_pop = fmean(observed_pop, na.rm=T),
      
      pred_1_16_pop = fmean(pred_1_16_pop, na.rm=T),
      pred_2_16_pop = fmean(pred_2_16_pop, na.rm=T),
      pred_3_16_pop = fmean(pred_3_16_pop, na.rm=T),
      
      pred_1_8_pop = fmean(pred_1_8_pop, na.rm=T),
      pred_2_8_pop = fmean(pred_2_8_pop, na.rm=T),
      pred_3_8_pop = fmean(pred_3_8_pop, na.rm=T),
      
      PM25_1_observed = fmean(PM25_1, na.rm=T, w=observed_pop),
      PM25_2_observed = fmean(PM25_2, na.rm=T, w=observed_pop),
      PM25_3_observed = fmean(PM25_3, na.rm=T, w=observed_pop),
      
      PM25_1_16 = fmean(PM25_1, na.rm=T, w=pred_1_16_pop),
      PM25_2_16 = fmean(PM25_2, na.rm=T, w=pred_1_16_pop),
      PM25_3_16 = fmean(PM25_3, na.rm=T, w=pred_1_16_pop),
      
      PM25_1_8 = fmean(PM25_1, na.rm=T, w=pred_1_8_pop),
      PM25_2_8 = fmean(PM25_2, na.rm=T, w=pred_1_8_pop),
      PM25_3_8 = fmean(PM25_3, na.rm=T, w=pred_1_8_pop),
      
      kg_lpg_consumed_observed = fsum(kg_lpg_consumed_observed),
      kg_lpg_consumed_1_16 = fsum(kg_lpg_consumed_1_16),
      kg_lpg_consumed_2_16 = fsum(kg_lpg_consumed_2_16),
      kg_lpg_consumed_3_16 = fsum(kg_lpg_consumed_3_16),
      kg_lpg_consumed_1_8 = fsum(kg_lpg_consumed_1_8),
      kg_lpg_consumed_2_8 = fsum(kg_lpg_consumed_2_8),
      kg_lpg_consumed_3_8 = fsum(kg_lpg_consumed_3_8),
      
      VSL = min(VSL, na.rm=T),
      VSL_low = min(VSL_low, na.rm=T),
      VSL_high = min(VSL_high, na.rm=T),
      
      observed_deaths1 = fsum(observed_deaths1),
      pred_1_16_deaths1 = fsum(pred_1_16_deaths1),
      pred_2_16_deaths1 = fsum(pred_2_16_deaths1),
      pred_3_16_deaths1 = fsum(pred_3_16_deaths1),
      
      pred_1_8_deaths1 = fsum(pred_1_8_deaths1),
      pred_2_8_deaths1 = fsum(pred_2_8_deaths1),
      pred_3_8_deaths1 = fsum(pred_3_8_deaths1),
      
      observed_deaths2 = fsum(observed_deaths2),
      pred_1_16_deaths2 = fsum(pred_1_16_deaths2),
      pred_2_16_deaths2 = fsum(pred_2_16_deaths2),
      pred_3_16_deaths2 = fsum(pred_3_16_deaths2),
      
      pred_1_8_deaths2 = fsum(pred_1_8_deaths2),
      pred_2_8_deaths2 = fsum(pred_2_8_deaths2),
      pred_3_8_deaths2 = fsum(pred_3_8_deaths2),
      
      observed_deaths3 = fsum(observed_deaths3),
      pred_1_16_deaths3 = fsum(pred_1_16_deaths3),
      pred_2_16_deaths3 = fsum(pred_2_16_deaths3),
      pred_3_16_deaths3 = fsum(pred_3_16_deaths3),
      
      pred_1_8_deaths3 = fsum(pred_1_8_deaths3),
      pred_2_8_deaths3 = fsum(pred_2_8_deaths3),
      pred_3_8_deaths3 = fsum(pred_3_8_deaths3)
    ) %>%
    mutate(
      pred_1_16_deaths_difference1 = observed_deaths1 - pred_1_16_deaths1,
      pred_1_8_deaths_difference1 = observed_deaths1 - pred_1_8_deaths1,
      pred_2_16_deaths_difference1 = observed_deaths1 - pred_2_16_deaths1,
      pred_2_8_deaths_difference1 = observed_deaths1 - pred_2_8_deaths1,
      pred_3_16_deaths_difference1 = observed_deaths1 - pred_2_16_deaths1,
      pred_3_8_deaths_difference1 = observed_deaths1 - pred_2_8_deaths1,
      
      pred_1_16_deaths_difference2 = observed_deaths2 - pred_1_16_deaths2,
      pred_1_8_deaths_difference2 = observed_deaths2 - pred_1_8_deaths2,
      pred_2_16_deaths_difference2 = observed_deaths2 - pred_2_16_deaths2,
      pred_2_8_deaths_difference2 = observed_deaths2 - pred_2_8_deaths2,
      pred_3_16_deaths_difference2 = observed_deaths2 - pred_2_16_deaths2,
      pred_3_8_deaths_difference2 = observed_deaths2 - pred_2_8_deaths2,
      
      pred_1_16_deaths_difference3 = observed_deaths3 - pred_1_16_deaths3,
      pred_1_8_deaths_difference3 = observed_deaths3 - pred_1_8_deaths3,
      pred_2_16_deaths_difference3 = observed_deaths3 - pred_2_16_deaths3,
      pred_2_8_deaths_difference3 = observed_deaths3 - pred_2_8_deaths3,
      pred_3_16_deaths_difference3 = observed_deaths3 - pred_2_16_deaths3,
      pred_3_8_deaths_difference3 = observed_deaths3 - pred_2_8_deaths3
    ) %>%
    mutate(
      # Differences scaled by VSL
      pred_1_16_deaths_difference1_VSL = pred_1_16_deaths_difference1 * VSL,
      pred_1_8_deaths_difference1_VSL = pred_1_8_deaths_difference1 * VSL,
      pred_2_16_deaths_difference1_VSL = pred_2_16_deaths_difference1 * VSL,
      pred_2_8_deaths_difference1_VSL = pred_2_8_deaths_difference1 * VSL,
      pred_3_16_deaths_difference1_VSL = pred_3_16_deaths_difference1 * VSL,
      pred_3_8_deaths_difference1_VSL = pred_3_8_deaths_difference1 * VSL,
      
      pred_1_16_deaths_difference2_VSL = pred_1_16_deaths_difference2 * VSL,
      pred_1_8_deaths_difference2_VSL = pred_1_8_deaths_difference2 * VSL,
      pred_2_16_deaths_difference2_VSL = pred_2_16_deaths_difference2 * VSL,
      pred_2_8_deaths_difference2_VSL = pred_2_8_deaths_difference2 * VSL,
      pred_3_16_deaths_difference2_VSL = pred_3_16_deaths_difference2 * VSL,
      pred_3_8_deaths_difference2_VSL = pred_3_8_deaths_difference2 * VSL,
      
      pred_1_16_deaths_difference3_VSL = pred_1_16_deaths_difference3 * VSL,
      pred_1_8_deaths_difference3_VSL = pred_1_8_deaths_difference3 * VSL,
      pred_2_16_deaths_difference3_VSL = pred_2_16_deaths_difference3 * VSL,
      pred_2_8_deaths_difference3_VSL = pred_2_8_deaths_difference3 * VSL,
      pred_3_16_deaths_difference3_VSL = pred_3_16_deaths_difference3 * VSL,
      pred_3_8_deaths_difference3_VSL = pred_3_8_deaths_difference3 * VSL,
      
      # Differences scaled by VSL_low
      pred_1_16_deaths_difference1_VSL_low = pred_1_16_deaths_difference1 * VSL_low,
      pred_1_8_deaths_difference1_VSL_low = pred_1_8_deaths_difference1 * VSL_low,
      pred_2_16_deaths_difference1_VSL_low = pred_2_16_deaths_difference1 * VSL_low,
      pred_2_8_deaths_difference1_VSL_low = pred_2_8_deaths_difference1 * VSL_low,
      pred_3_16_deaths_difference1_VSL_low = pred_3_16_deaths_difference1 * VSL_low,
      pred_3_8_deaths_difference1_VSL_low = pred_3_8_deaths_difference1 * VSL_low,
      
      pred_1_16_deaths_difference2_VSL_low = pred_1_16_deaths_difference2 * VSL_low,
      pred_1_8_deaths_difference2_VSL_low = pred_1_8_deaths_difference2 * VSL_low,
      pred_2_16_deaths_difference2_VSL_low = pred_2_16_deaths_difference2 * VSL_low,
      pred_2_8_deaths_difference2_VSL_low = pred_2_8_deaths_difference2 * VSL_low,
      pred_3_16_deaths_difference2_VSL_low = pred_3_16_deaths_difference2 * VSL_low,
      pred_3_8_deaths_difference2_VSL_low = pred_3_8_deaths_difference2 * VSL_low,
      
      pred_1_16_deaths_difference3_VSL_low = pred_1_16_deaths_difference3 * VSL_low,
      pred_1_8_deaths_difference3_VSL_low = pred_1_8_deaths_difference3 * VSL_low,
      pred_2_16_deaths_difference3_VSL_low = pred_2_16_deaths_difference3 * VSL_low,
      pred_2_8_deaths_difference3_VSL_low = pred_2_8_deaths_difference3 * VSL_low,
      pred_3_16_deaths_difference3_VSL_low = pred_3_16_deaths_difference3 * VSL_low,
      pred_3_8_deaths_difference3_VSL_low = pred_3_8_deaths_difference3 * VSL_low,
      
      # Differences scaled by VSL_high
      pred_1_16_deaths_difference1_VSL_high = pred_1_16_deaths_difference1 * VSL_high,
      pred_1_8_deaths_difference1_VSL_high = pred_1_8_deaths_difference1 * VSL_high,
      pred_2_16_deaths_difference1_VSL_high = pred_2_16_deaths_difference1 * VSL_high,
      pred_2_8_deaths_difference1_VSL_high = pred_2_8_deaths_difference1 * VSL_high,
      pred_3_16_deaths_difference1_VSL_high = pred_3_16_deaths_difference1 * VSL_high,
      pred_3_8_deaths_difference1_VSL_high = pred_3_8_deaths_difference1 * VSL_high,
      
      pred_1_16_deaths_difference2_VSL_high = pred_1_16_deaths_difference2 * VSL_high,
      pred_1_8_deaths_difference2_VSL_high = pred_1_8_deaths_difference2 * VSL_high,
      pred_2_16_deaths_difference2_VSL_high = pred_2_16_deaths_difference2 * VSL_high,
      pred_2_8_deaths_difference2_VSL_high = pred_2_8_deaths_difference2 * VSL_high,
      pred_3_16_deaths_difference2_VSL_high = pred_3_16_deaths_difference2 * VSL_high,
      pred_3_8_deaths_difference2_VSL_high = pred_3_8_deaths_difference2 * VSL_high,
      
      pred_1_16_deaths_difference3_VSL_high = pred_1_16_deaths_difference3 * VSL_high,
      pred_1_8_deaths_difference3_VSL_high = pred_1_8_deaths_difference3 * VSL_high,
      pred_2_16_deaths_difference3_VSL_high = pred_2_16_deaths_difference3 * VSL_high,
      pred_2_8_deaths_difference3_VSL_high = pred_2_8_deaths_difference3 * VSL_high,
      pred_3_16_deaths_difference3_VSL_high = pred_3_16_deaths_difference3 * VSL_high,
      pred_3_8_deaths_difference3_VSL_high = pred_3_8_deaths_difference3 * VSL_high
    ) %>%
    mutate(
      pred_1_16_deaths_difference1_VSL_discounted = pred_1_16_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference1_VSL_discounted = pred_1_8_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference1_VSL_discounted = pred_2_16_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference1_VSL_discounted = pred_2_8_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference1_VSL_discounted = pred_3_16_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference1_VSL_discounted = pred_3_8_deaths_difference1_VSL / (1 + 0.05) ^ (Year - 2023),
      
      pred_1_16_deaths_difference2_VSL_discounted = pred_1_16_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference2_VSL_discounted = pred_1_8_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference2_VSL_discounted = pred_2_16_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference2_VSL_discounted = pred_2_8_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference2_VSL_discounted = pred_3_16_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference2_VSL_discounted = pred_3_8_deaths_difference2_VSL / (1 + 0.05) ^ (Year - 2023),
      
      pred_1_16_deaths_difference3_VSL_discounted = pred_1_16_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference3_VSL_discounted = pred_1_8_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference3_VSL_discounted = pred_2_16_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference3_VSL_discounted = pred_2_8_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference3_VSL_discounted = pred_3_16_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference3_VSL_discounted = pred_3_8_deaths_difference3_VSL / (1 + 0.05) ^ (Year - 2023),
      
      pred_1_16_deaths_difference1_VSL_low_discounted = pred_1_16_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference1_VSL_low_discounted = pred_1_8_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference1_VSL_low_discounted = pred_2_16_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference1_VSL_low_discounted = pred_2_8_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference1_VSL_low_discounted = pred_3_16_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference1_VSL_low_discounted = pred_3_8_deaths_difference1_VSL_low / (1 + 0.05) ^ (Year - 2023),
      
      pred_1_16_deaths_difference2_VSL_low_discounted = pred_1_16_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference2_VSL_low_discounted = pred_1_8_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference2_VSL_low_discounted = pred_2_16_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference2_VSL_low_discounted = pred_2_8_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference2_VSL_low_discounted = pred_3_16_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference2_VSL_low_discounted = pred_3_8_deaths_difference2_VSL_low / (1 + 0.05) ^ (Year - 2023),
      
      pred_1_16_deaths_difference3_VSL_low_discounted = pred_1_16_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference3_VSL_low_discounted = pred_1_8_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference3_VSL_low_discounted = pred_2_16_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference3_VSL_low_discounted = pred_2_8_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference3_VSL_low_discounted = pred_3_16_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference3_VSL_low_discounted = pred_3_8_deaths_difference3_VSL_low / (1 + 0.05) ^ (Year - 2023),
      
      pred_1_16_deaths_difference1_VSL_high_discounted = pred_1_16_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference1_VSL_high_discounted = pred_1_8_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference1_VSL_high_discounted = pred_2_16_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference1_VSL_high_discounted = pred_2_8_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference1_VSL_high_discounted = pred_3_16_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference1_VSL_high_discounted = pred_3_8_deaths_difference1_VSL_high / (1 + 0.05) ^ (Year - 2023),
      
      pred_1_16_deaths_difference2_VSL_high_discounted = pred_1_16_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference2_VSL_high_discounted = pred_1_8_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference2_VSL_high_discounted = pred_2_16_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference2_VSL_high_discounted = pred_2_8_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference2_VSL_high_discounted = pred_3_16_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference2_VSL_high_discounted = pred_3_8_deaths_difference2_VSL_high / (1 + 0.05) ^ (Year - 2023),
      
      pred_1_16_deaths_difference3_VSL_high_discounted = pred_1_16_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_1_8_deaths_difference3_VSL_high_discounted = pred_1_8_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_2_16_deaths_difference3_VSL_high_discounted = pred_2_16_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_2_8_deaths_difference3_VSL_high_discounted = pred_2_8_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_3_16_deaths_difference3_VSL_high_discounted = pred_3_16_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023),
      pred_3_8_deaths_difference3_VSL_high_discounted = pred_3_8_deaths_difference3_VSL_high / (1 + 0.05) ^ (Year - 2023)
      
    )
  
  kenya_kg_pm_scenario_1_yr_long <- 
    kenya_kg_pm_scenario_1_yr_long %>% 
    bind_rows(kenya_kg_pm_scenario_1_yr %>% 
                mutate(model=i))
  
  print(i)
  
  }


write_rds(kenya_kg_pm_scenario_1_yr_long, "~/Downloads/kenya_kg_pm_scenario_1_yr_long.rds")






# Boot PM2.5 <> KG --------

set.seed(24)



for (i in 1:1000) {
  
  min1 = rnorm(1, 35, 5)
  max1 = rnorm(1, 250, 25)
  decline1 = rnorm(1, 18, 2.5)
  decline2 = rnorm(1, 13, 2.5)
  exp1 = rnorm(1, 3, .05)
  exp2 = rnorm(1, 5, .1)
  
  x=1:55
  PM25_1=min1 + (max1-min1) / (1 + (x/decline1)^exp1)
  PM25_2=min1 + (max1-min1) / (1 + (x/decline2)^exp1)
  PM25_3=max1 - ((max1 - min1) / max(x))*x
  
  kg_lpg_pm25 <- tibble(
    kg_lpg = 1:55,
    PM25_1 = PM25_1,
    PM25_2 = PM25_2,
    PM25_3 = PM25_3,
    model=i
  )
  
  kg_lpg_pm25_long <- 
    kg_lpg_pm25_long %>% 
    bind_rows(
      kg_lpg_pm25
    )
  
  print(i)
}

write_rds(kg_lpg_pm25_long, "~/Downloads/kenya_kg_lpg_pm25_long.rds")



